\documentclass[11pt]{article}
\usepackage[margin=0.8in]{geometry}                % See geometry.pdf to learn the layout options. There are lots.
\geometry{letterpaper}                   % ... or a4paper or a5paper or ... 
%\geometry{landscape}                % Activate for for rotated page geometry
%\usepackage[parfill]{parskip}    % Activate to begin paragraphs with an empty line rather than an indent

\usepackage[colorlinks=true]{hyperref}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{epstopdf}

\DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png}

\title{Project 3 Tasks}
\author{Saurabh V. Pendse (ID: 001026185, Unity ID : svpendse)}
\date{\today}
%\date{}                                           % Activate to display a given date or no date

\begin{document}
\maketitle
\section{Task 1}
\begin{displaymath}
f_{X}(x) = \frac{\alpha k^{\alpha}}{1 - \left ( \frac{k}{p} \right )^{\alpha}} x^{-\alpha - 1} \quad k \leq x \leq p, 0 < \alpha < 2
\end{displaymath}

\subsection{Mean} 
\begin{eqnarray}
E[X] &=& \int_{-\infty}^{\infty} x f_{X} (x) dx \\
&=& \int_{k}^{p} x \frac{\alpha k^{\alpha}}{1 - \left ( \frac{k}{p} \right )^{\alpha}} x^{-\alpha - 1} dx \\
&=& \frac{\alpha k^{\alpha}}{1 - \left ( \frac{k}{p} \right ) ^{\alpha}} \left [ \frac{x ^{-\alpha +1 }}{-\alpha +1} \right ]_{k} ^{p} \\
&=& \frac{\alpha k^{\alpha}}{\left ( 1 - \left ( \frac{k}{p} \right ) ^{\alpha} \right ) (1-\alpha)} (p^{1-\alpha} - k^{1-\alpha}) \\
\therefore E[X] &=& \frac{k^{\alpha}}{\left ( 1 - \left (\frac{k}{p} \right )^{\alpha} \right )} \cdot \left ( \frac{\alpha}{\alpha - 1} \right ) \cdot \left ( \frac{1}{k^{\alpha - 1}} - \frac{1}{p^{\alpha - 1}} \right )
\end{eqnarray}

The parameters specified are $k = 332$, $p = 10^{10}$, $\alpha = 1.1$.
\begin{eqnarray}
\therefore E[X] &=& \frac{332^{1.1}}{\left ( 1 - \left (\frac{332}{10^{10}} \right )^{1.1} \right )} \cdot \left ( \frac{1.1}{1.1 - 1} \right ) \cdot \left ( \frac{1}{332^{1.1 - 1}} - \frac{1}{10^{10(1.1 - 1)}} \right ) \\
&=& \mathbf{2999.4}
\end{eqnarray}

\subsection{Variance}
\begin{eqnarray}
E[X^{2}] &=& \int_{-\infty}^{\infty} x^{2} f_{X} (x) dx \\
&=& \int_{k}^{p} x^{2} \frac{\alpha k^{\alpha}}{1 - \left ( \frac{k}{p} \right )^{\alpha}} x^{-\alpha - 1} dx \\
&=& \frac{\alpha k^{\alpha}}{1 - \left ( \frac{k}{p} \right ) ^{\alpha}} \left [ \frac{x ^{-\alpha +2 }}{-\alpha +2} \right ]_{k} ^{p} \\
&=& \frac{\alpha k^{\alpha}}{\left ( 1 - \left ( \frac{k}{p} \right ) ^{\alpha} \right ) (2-\alpha)} (p^{2-\alpha} - k^{2-\alpha}) \\
\therefore E[X^{2}] &=& \frac{k^{\alpha}}{\left ( 1 - \left (\frac{k}{p} \right )^{\alpha} \right )} \cdot \left ( \frac{\alpha}{\alpha - 2} \right ) \cdot \left ( \frac{1}{k^{\alpha - 2}} - \frac{1}{p^{\alpha - 2}} \right )
\end{eqnarray}

Thus, the variance $\sigma_{X}^{2}$ is given by : 
\begin{eqnarray}
\nonumber \sigma_{X}^{2} &=& E[X^{2}] - E[X]^{2} \\
\nonumber &=& \frac{k^{\alpha}}{\left ( 1 - \left (\frac{k}{p} \right )^{\alpha} \right )} \cdot \left ( \frac{\alpha}{\alpha - 2} \right ) \cdot \left ( \frac{1}{k^{\alpha - 2}} - \frac{1}{p^{\alpha - 2}} \right ) -
\left ( \frac{k^{\alpha}}{\left ( 1 - \left (\frac{k}{p} \right )^{\alpha} \right )} \cdot \left ( \frac{\alpha}{\alpha - 1} \right ) \cdot \left ( \frac{1}{k^{\alpha - 1}} - \frac{1}{p^{\alpha - 1}} \right ) \right ) ^{2}
\end{eqnarray}

For the given parameters,
\begin{eqnarray}
E[X^{2}] &=& \frac{332^{1.1}}{\left ( 1 - \left (\frac{332}{10^{10}} \right )^{1.1} \right )} \cdot \left ( \frac{1.1}{1.1 - 2} \right ) \cdot \left ( \frac{1}{332^{1.1 - 2}} - \frac{1}{10^{10(1.1 - 2)}} \right ) \\
&=& 7.2511e^{11} \\
\therefore \sigma_{X}^{2} &=& E[X^{2}] - E[X]^{2} \\
&=& 7.2511e^{11} - 2999.4^{2} \\
&=& \mathbf{7.2510e^{11}}
\end{eqnarray}

\section{Task 2}

\begin{enumerate}
\item Distributions whose tail declines like a power law, which means that the probability of extremely large observations is non-negligible are known as heavy tailed distributions. Let $X$ be a random variable with CDF $F(x) = P[X \leq x]$ and complementary CDF $\overline{F}(x) = 1 - F(x) = P[X > x]$. The distribution is said to be heavy-tailed if
\begin{equation}
\overline{F}(x) \varpropto c x^{-\alpha} \quad 0 < \alpha < 2
\end{equation}
for some positive constant $c$, where $a(x) \varpropto b(x)$ means $\lim_{x\longrightarrow \infty} \frac{a(x)}{b(x)} = 1$. If $F(x)$ is heavy tailed, then $X$ shows very high variability. In particular, $X$ has infinite variance, and if $\alpha \leq 1$, $X$ has infinite mean. For example, such distributions have been found to describe the lengths of bursts in network traffic and the sizes of files in some systems.
\item An $\alpha-$Stable distribution is a distribution with four parameters: $\alpha$, a location parameter (analogous to the mean), a scale parameter (analogous to the standard deviation), and a skewness parameter. Based on the value of the last parameter, the distribution can be either skewed or symmetric. An $\alpha$-Stable distribution has a bell-shaped body much like the Normal distribution, but it has much heavier tails. In fact, the $\alpha$-Stable distribution has power-law tails that follow the same $\alpha$ as that of the distribution $F$ from which the original observations were drawn.

$\alpha$-Stable distributions are introduced to analyze the behavior of the sample mean in case of random variables with infinite variance. Since the Central Limit Theorem only applies to sums of random variables with finite variance, it cannot be used in case of heavy tailed distributions. Let $X_{i}$ be i.i.d random variables drawn from some distribution $F$ that is heavy tailed with tail index  1 < $\alpha$ < 2. Then, 
\begin{equation}
A_{n} = \frac{1}{n} \sum_{i=1}^{n} X_{i}
\end{equation}
Now, if we define
\begin{equation}
Z_{n} = n^{1-\frac{1}{\alpha}} (A_{n} - \mu)
\end{equation}
then $Z_{n}$ converges to $S_{\alpha}$, where $S_{\alpha}$ is an $\alpha$-Stable distribution.

\item The paper shows that even at steady state, the sample mean of a set of i.i.d random variables drawn from some distribution $F$ that is heavy tailed with tail index $1 < \alpha  < 2$ will be distributed according to a heavy tailed distribution, and hence will show high variability. Thus, the likelihood of an erroneous measurement of $\mu$ is still non-negligible. Equivalently, the simulation still behaves erratically. 

A swamping observation is defined as one whose presence causes the estimate of $\mu$ to be at least twice as large as it should be. The significance of a swamping observation is that if we happen to encounter such an observation in our simulation, then the observed mean of the input will have a relative error of at least $100\%$.

\item The authors claim that  a simulation may always be in a transient state if the random variable $X$ of interest follows a heavy tailed distribution and the $\alpha$ parameter for the corresponding $\alpha$-Stable distribution is less than $1.7$, since the number of samples to achieve steady state is very high for $\alpha < 1.7$. 

Thus, it is not feasible in any reasonable amount of time to observe steady state in such a simulation. Over a reasonable time scale, such a simulation is always in a transient state.

\item The authors show that a difficult problem arises when simulating systems with heavy tailed workloads. In such systems, steady-state behavior can be elusive, because average-case behavior depends on the presence of many small observations as well as a few large observations. This problem has two implications : 
\begin{enumerate}
\item It may not be possible in any reasonable time to achieve steady state.
\item Simulations may still behave erratically even at steady state.
\end{enumerate}
The authors suggest that one should carefully consider the stability of the simulation results when running simulations using heavy tailed workloads with $\alpha < 1.7$. In such cases, a fruitful approach may be to incorporate time scale explicitly into the stability analysis using techniques from order statistics.
\end{enumerate}

\section{Task 3}
\begin{figure}
	\begin{subfigure}[b]{0.48\linewidth}
                \centering
                \includegraphics[width=\linewidth]{../task3/system_mm3.eps}
                \caption{Task 3 : Average customer system time vs $\rho$ (M/M/3)}
                \label{fig:task3a}
        \end{subfigure}
        \begin{subfigure}[b]{0.48\linewidth}
                \centering
                \includegraphics[width=\linewidth]{../task3/system_mg3.eps}
                \caption{Task 3 : Average customer system time vs $\rho$ (M/G/3)}
                \label{fig:task3b}
        \end{subfigure}        
        
                \begin{subfigure}[b]{0.48\linewidth}
                \centering
                \includegraphics[width=\linewidth]{../task3/system.eps}
                \caption{Task 3 : Average customer system time vs $\rho$ (M/M/3 and M/G/3)}
                \label{fig:task3c}
        \end{subfigure}        
        \caption{Results for Task 3}
	\label{fig:task3}
\end{figure}

The results are shown in the Figures \ref{fig:task3a}, \ref{fig:task3b} and \ref{fig:task3c} respectively. We observe that both the M/M/3 and the M/G/3 systems exhibit an increasing trend in the average customer system time with increasing values of $\rho$. The magnitude of the system times, however, is much higher in case of the M/G/3 system, especially for higher values of $\rho$, where the values are about $4$ times as high as the ones for the M/M/3 system. Also, the confidence intervals are much wider spread for the M/G/3 system as compared to the M/M/3 system. This is due to the high variance of the pareto distributed service times. Also, the range of values of average system time for the M/G/3 system as a function of $\rho$ varied considerably between disparate batch runs. This behavior is again attributed to the high variance of the pareto distributed service times.

Also, the SJF service discipline results in a lower average customer system times as compared to the FCFS service discipline for all values of $\rho$ in both the M/M/3 and the M/G/3 systems. The difference is more evident for higher values of $\rho$. Based on the material covered in class, this is the expected behavior, since for the M/G/3 system the service times follow a bounded pareto distribution, which results in much higher first and second moments as compared to the same for the exponentially distributed service times for the M/M/3 system. 
%Note that for a M/G/3 FCFS system, 
%\begin{equation}
%E[T] = E[X] + E[W] = E[X] + \frac{\lambda E[X^{2}]}{2(1-\rho)} \quad \rho = \frac{\lambda}{3\mu}
%\end{equation}
%For an M/G/3 SJF system,
%\begin{eqnarray}
%E[W(x)] &=& \frac{\lambda E[X^{2}]}{2(1-\rho(x))^{2}} \quad \textnormal{($x$ is the service time)}\\
%E[T] &=& E[X] + \int_{0}^{\infty} E[W(x)] f_{X}(x) dx
%\end{eqnarray}
%Here $\rho(x) = \lambda \int_{0}^{x} t f(t) dt$ denotes the load made up by jobs of size less than $x$ only.

This results in an increased average customer system time for the M/G/3 system as compared to the M/M/3 system.
\section{Task 4}

\begin{figure}
	\begin{subfigure}[b]{0.48\linewidth}
                \centering
                \includegraphics[width=\linewidth]{../task4/slowdown_mm1.eps}
                \caption{Task 4 : Slowdown vs Percentile of service time distribution (M/M/1)}
                \label{fig:task4a}
        \end{subfigure}
        \begin{subfigure}[b]{0.48\linewidth}
                \centering
                \includegraphics[width=\linewidth]{../task4/slowdown_mg1.eps}
                \caption{Task 4 : Slowdown vs Percentile of service time distribution (M/G/1)}
                \label{fig:task4b}
        \end{subfigure}                
        \caption{Results for Task 4}
	\label{fig:task4}
\end{figure}

The results for the M/M/1 and M/G/1 systems are shown in the Figures \ref{fig:task4a} and \ref{fig:task4b} respectively. For both the systems, the slowdown metric is very high for customers with small service times, whereas it is very low for customers with high service times. This is true for both the FCFS and SJF service disciplines.  It is worth mentioning that the range of values of slowdown for the M/G/1 system varied considerably between disparate runs. This behavior is attributed to the high variance of the pareto distributed service times.

For the M/M/1 system, we observe a sharp decrease in the slowdown metric from the lower range to the higher range bins. For the M/M/1 FCFS system, the average waiting time is given by 
\begin{eqnarray}
\overline{W}(x) &=& \frac{\rho^{2}}{\lambda(1-\rho)} \\
\therefore S(x) &=& \frac{W(x)}{x} = \frac{\rho^{2}}{x\lambda(1-\rho)}
\end{eqnarray}

Thus, the slowdown metric should be inversely proportional to the service time $x$. In case of the FCFS M/M/1 system, some of the customers with small service times have to wait until all the large jobs ahead of them in the queue are serviced. Hence the slowdown metric i.e. the waiting time per unit of service, on an average is very high for customers with small service times, much higher than the same for SJF. The same is observed empirically in the Figure \ref{fig:task4a}.

It is particularly interesting to discuss about the SJF M/M/1 system. Although SJF discriminates based on the service discipline implying that customers with small service times have a lower waiting time, but the ratio of waiting time by the service time is quite high (although lower than the same in FCFS). i.e. the such customers have to wait a lot more per unit of service. This is consistent with the theoretically observed result for the M/M/1 SJF system, wherein the waiting time, as a function of the service time is given by : 

\begin{eqnarray}
\overline{W}(x) &=& \frac{\frac{\lambda}{\mu^{2}}}{\left ( 1 - \frac{\lambda}{\mu} ( 1 - e^{-\mu x}(1+\mu x)) \right )^{2}} \\
\therefore S(x) &=& \frac{\overline{W}(x)}{x} = \frac{\frac{\lambda}{\mu^{2}}}{x \left ( 1 - \frac{\lambda}{\mu} ( 1 - e^{-\mu x}(1+\mu x)) \right )^{2}}
\end{eqnarray}

Thus, we find that $S(x)$ is inversely proportional to $x$ i.e. customers with a smaller service time experience a higher slowdown, which is consistent with the empirical results.  The Figure \ref{fig:task4a} also shows that the SJF discipline results in a lower slowdown as compared to the FCFS discipline.

A similar argument can be made for the M/G/1 FCFS and SJF systems as well. In case of the M/G/1 FCFS system, the expected waiting time is given by : 
\begin{equation}
E[W(x)] = \frac{\lambda E[X^{2}]}{2(1-\rho)}
\end{equation}
which explains the decreasing trend of the slowdown $S(x) = \frac{W(x)}{x}$. For a M/G/1 SJF system, the expected waiting time is given by : 
\begin{equation}
E[W(x)] = \frac{\lambda E[X^{2}]}{2(1-\rho(x))^{2}}
\end{equation}

Here $\rho(x) = \lambda \int_{0}^{x} t f(t) dt$ is the fraction of the total load made up of just jobs of size $<x$. Here $f(t)$ is the PDF of the service times. 
While SJF favors short jobs, even a short job still has to wait for the server to first free up. Thus the waiting time of a job of size $x$ still includes the excess term, involving $E[X^{2}]$ seen in FCFS. The only difference is that now we have $(1-\rho(x))^{2}$ in the denominator rather than $(1-\rho)$. When a job of size $x$ arrives, it first has to wait for the server to finish serving the current job (the residual). Then the job $x$ has to wait behind those jobs in the queue whose size are $< x$, these contribute a $(1-\rho(x))$ factor in the denominator. Finally the job $x$ has to wait behind all future arrivals of size $x$ during its waiting time, these contribute another $(1-\rho(x))$ factor. In case of $FCFS$, the job $x$ does not have to wait behind future arrivals.

Here, we observe a decreasing trend (with intermediate fluctuations) in the slowdown metric with increasing bin numbers, as compared to the non-linear decrease for the M/G/1 system. However, the magnitude of the slowdown metric is much higher as compared to the M/M/1 system, especially for the lower range bins. The slowdown for the FCFS service discipline is higher than that for the SJF service discipline. Also, the slowdown output is much noisier as compared the the M/M/1 results, with frequent spikes due to the heavy tailed pareto distribution. 

Based on these plots, it can be said that neither the FCFS nor the SJF service discipline is fair in its nature.  The slowdown of the processor discipline discussed in class i.e. the round-robin scheduling discipline remains constant as a function of the service time. i.e.
\begin{equation}
S(x) = \frac{\rho}{1-\rho}
\end{equation}
Thus, the round-robin discipline exhibits a constant slowdown, which indicates that it is a fair service discipline, contrary to both FCFS and SJF.
\end{document}  
